library(readr)
Beers <- read_csv("Beers.csv")## Parsed with column specification:
## cols(
## Name = col_character(),
## Beer_ID = col_double(),
## ABV = col_double(),
## IBU = col_double(),
## Brewery_id = col_double(),
## Style = col_character(),
## Ounces = col_double()
## )
head(Beers)## # A tibble: 6 x 7
## Name Beer_ID ABV IBU Brewery_id Style Ounces
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Pub Beer 1436 0.05 NA 409 American Pale Lager 12
## 2 Devil's Cup 2265 0.066 NA 178 American Pale Ale (… 12
## 3 Rise of the P… 2264 0.071 NA 178 American IPA 12
## 4 Sinister 2263 0.09 NA 178 American Double / I… 12
## 5 Sex and Candy 2262 0.075 NA 178 American IPA 12
## 6 Black Exodus 2261 0.077 NA 178 Oatmeal Stout 12
tail(Beers)## # A tibble: 6 x 7
## Name Beer_ID ABV IBU Brewery_id Style Ounces
## <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Rocky Mountain Oy… 1035 0.075 NA 425 American Stout 12
## 2 Belgorado 928 0.067 45 425 Belgian IPA 12
## 3 Rail Yard Ale 807 0.052 NA 425 American Amber … 12
## 4 B3K Black Lager 620 0.055 NA 425 Schwarzbier 12
## 5 Silverback Pale A… 145 0.055 40 425 American Pale A… 12
## 6 Rail Yard Ale (20… 84 0.052 NA 425 American Amber … 12
library(readr)
Breweries <- read_csv("Breweries.csv")## Parsed with column specification:
## cols(
## Brew_ID = col_double(),
## Name = col_character(),
## City = col_character(),
## State = col_character()
## )
head(Breweries)## # A tibble: 6 x 4
## Brew_ID Name City State
## <dbl> <chr> <chr> <chr>
## 1 1 NorthGate Brewing Minneapolis MN
## 2 2 Against the Grain Brewery Louisville KY
## 3 3 Jack's Abby Craft Lagers Framingham MA
## 4 4 Mike Hess Brewing Company San Diego CA
## 5 5 Fort Point Beer Company San Francisco CA
## 6 6 COAST Brewing Company Charleston SC
tail(Breweries)## # A tibble: 6 x 4
## Brew_ID Name City State
## <dbl> <chr> <chr> <chr>
## 1 553 Mickey Finn's Brewery Libertyville IL
## 2 554 Covington Brewhouse Covington LA
## 3 555 Dave's Brewfarm Wilson WI
## 4 556 Ukiah Brewing Company Ukiah CA
## 5 557 Butternuts Beer and Ale Garrattsville NY
## 6 558 Sleeping Lady Brewing Company Anchorage AK
str(Breweries)## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 558 obs. of 4 variables:
## $ Brew_ID: num 1 2 3 4 5 6 7 8 9 10 ...
## $ Name : chr "NorthGate Brewing" "Against the Grain Brewery" "Jack's Abby Craft Lagers" "Mike Hess Brewing Company" ...
## $ City : chr "Minneapolis" "Louisville" "Framingham" "San Diego" ...
## $ State : chr "MN" "KY" "MA" "CA" ...
## - attr(*, "spec")=
## .. cols(
## .. Brew_ID = col_double(),
## .. Name = col_character(),
## .. City = col_character(),
## .. State = col_character()
## .. )
str(Beers)## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 2410 obs. of 7 variables:
## $ Name : chr "Pub Beer" "Devil's Cup" "Rise of the Phoenix" "Sinister" ...
## $ Beer_ID : num 1436 2265 2264 2263 2262 ...
## $ ABV : num 0.05 0.066 0.071 0.09 0.075 0.077 0.045 0.065 0.055 0.086 ...
## $ IBU : num NA NA NA NA NA NA NA NA NA NA ...
## $ Brewery_id: num 409 178 178 178 178 178 178 178 178 178 ...
## $ Style : chr "American Pale Lager" "American Pale Ale (APA)" "American IPA" "American Double / Imperial IPA" ...
## $ Ounces : num 12 12 12 12 12 12 12 12 12 12 ...
## - attr(*, "spec")=
## .. cols(
## .. Name = col_character(),
## .. Beer_ID = col_double(),
## .. ABV = col_double(),
## .. IBU = col_double(),
## .. Brewery_id = col_double(),
## .. Style = col_character(),
## .. Ounces = col_double()
## .. )
names(Breweries)## [1] "Brew_ID" "Name" "City" "State"
names(Beers)## [1] "Name" "Beer_ID" "ABV" "IBU" "Brewery_id"
## [6] "Style" "Ounces"
Beers <- rename(Beers, Brew_ID = Brewery_id)
names(Beers)## [1] "Name" "Beer_ID" "ABV" "IBU" "Brew_ID" "Style" "Ounces"
fullData <- full_join(Breweries, Beers, by = "Brew_ID")head(fullData)## # A tibble: 6 x 10
## Brew_ID Name.x City State Name.y Beer_ID ABV IBU Style Ounces
## <dbl> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 1 NorthGa… Minne… MN Get To… 2692 0.045 50 America… 16
## 2 1 NorthGa… Minne… MN Maggie… 2691 0.049 26 Milk / … 16
## 3 1 NorthGa… Minne… MN Wall's… 2690 0.048 19 English… 16
## 4 1 NorthGa… Minne… MN Pumpion 2689 0.06 38 Pumpkin… 16
## 5 1 NorthGa… Minne… MN Strong… 2688 0.06 25 America… 16
## 6 1 NorthGa… Minne… MN Parape… 2687 0.056 47 Extra S… 16
fullData <- rename(fullData, BreweryName = Name.x, BeerName = Name.y)
names(fullData)## [1] "Brew_ID" "BreweryName" "City" "State" "BeerName"
## [6] "Beer_ID" "ABV" "IBU" "Style" "Ounces"
fullData %>%
group_by(State) %>%
summarise(count = n())%>%
arrange(desc(count))## # A tibble: 51 x 2
## State count
## <chr> <int>
## 1 CO 265
## 2 CA 183
## 3 MI 162
## 4 IN 139
## 5 TX 130
## 6 OR 125
## 7 PA 100
## 8 IL 91
## 9 WI 87
## 10 MA 82
## # … with 41 more rows
BrewStatesC <- Breweries%>%
group_by(State)%>%
summarise(count = n())%>%
arrange(desc(count))head(BrewStatesC)## # A tibble: 6 x 2
## State count
## <chr> <int>
## 1 CO 47
## 2 CA 39
## 3 MI 32
## 4 OR 29
## 5 TX 28
## 6 PA 25
state.freq <- table(Breweries$State)
barplot(state.freq[order(state.freq)],
horiz = T,
border = NA,
xlim = c(0, 100),
main = "Number of Breweries per State",
xlab = "Number of Brewiews",
ylab = "States") #
# Counting the number of brewweries in each state:
head(state.freq)##
## AK AL AR AZ CA CO
## 7 3 2 11 39 47
sDF<-data.frame(state.freq)
sDF<- rename(sDF, State = Var1, Brew_Count=Freq)p1 <- ggplot(Breweries, aes(x = fct_rev(fct_infreq(State)), fill = fct_infreq(State))) + geom_bar() + coord_flip() + theme(axis.title.x=element_blank(), axis.title.y=element_blank(),legend.position = "none")
ggplotly(p1) p2 <- ggplot(fullData, aes(x = fct_rev(fct_infreq(State)), fill = fct_rev(fct_infreq(State)))) + geom_bar() + theme(legend.position = "none") +
labs( title = "", x = "none", y = "Count of Beer") + theme(axis.title.x=element_blank(), axis.title.y=element_blank(),legend.position = "none")
ggplotly(p2)naB_Id<-sum(is.na(fullData$Brew_ID))
naB_Nm<-sum(is.na(fullData$BreweryName))
naB_C<-sum(is.na(fullData$City))
naB_S<-sum(is.na(fullData$State))
naBeer_Nm<-sum(is.na(fullData$BeerName))
naBeer_Id<-sum(is.na(fullData$Beer_ID))
naABV<-sum(is.na(fullData$ABV))
naIBU<-sum(is.na(fullData$IBU))
naStyle<-sum(is.na(fullData$Style))
naOunces<-sum(is.na(fullData$Ounces))
naResults = paste(naB_Id, naB_Nm, naB_C, naB_S, naBeer_Nm, naBeer_Id, naABV, naIBU, naStyle, naOunces, sep = ", ")
print(naResults)## [1] "0, 0, 0, 0, 0, 0, 62, 1005, 5, 0"
names(fullData)## [1] "Brew_ID" "BreweryName" "City" "State" "BeerName"
## [6] "Beer_ID" "ABV" "IBU" "Style" "Ounces"
select(fullData, State, ABV, IBU)## # A tibble: 2,410 x 3
## State ABV IBU
## <chr> <dbl> <dbl>
## 1 MN 0.045 50
## 2 MN 0.049 26
## 3 MN 0.048 19
## 4 MN 0.06 38
## 5 MN 0.06 25
## 6 MN 0.056 47
## 7 KY 0.08 68
## 8 KY 0.125 80
## 9 KY 0.077 25
## 10 KY 0.042 42
## # … with 2,400 more rows
Breweries %>%
group_by(State) %>%
tally(sort = TRUE)## # A tibble: 51 x 2
## State n
## <chr> <int>
## 1 CO 47
## 2 CA 39
## 3 MI 32
## 4 OR 29
## 5 TX 28
## 6 PA 25
## 7 MA 23
## 8 WA 23
## 9 IN 22
## 10 WI 20
## # … with 41 more rows
fullData %>%
group_by(State) %>%
summarise(state_count = n()) %>%
arrange(desc(state_count))## # A tibble: 51 x 2
## State state_count
## <chr> <int>
## 1 CO 265
## 2 CA 183
## 3 MI 162
## 4 IN 139
## 5 TX 130
## 6 OR 125
## 7 PA 100
## 8 IL 91
## 9 WI 87
## 10 MA 82
## # … with 41 more rows
mFD <- fullData %>%
group_by(State) %>%
summarise_each(funs(mean), mean_ABV = ABV, mean_IBU = IBU) %>%
arrange(desc(mean_ABV))## Warning: funs() is soft deprecated as of dplyr 0.8.0
## please use list() instead
##
## # Before:
## funs(name = f(.)
##
## # After:
## list(name = ~f(.))
## This warning is displayed once per session.
ggplot(fullData, aes(x = ABV)) +
geom_bar(na.rm = T) +
theme_grey() +
coord_flip()p11<-ggplot(fullData, aes(x = IBU, fill = "red")) +
geom_bar(na.rm = T) +
scale_color_brewer("Blues")
ggplotly(p11)fullData %>%
group_by(State) %>%
summarise_each(funs(min, max), ABV) ## # A tibble: 51 x 3
## State min max
## <chr> <dbl> <dbl>
## 1 AK 0.048 0.068
## 2 AL 0.05 0.093
## 3 AR 0.04 0.061
## 4 AZ NA NA
## 5 CA NA NA
## 6 CO NA NA
## 7 CT 0.034 0.09
## 8 DC 0.05 0.092
## 9 DE NA NA
## 10 FL NA NA
## # … with 41 more rows
mFDt<- data.frame(fullData %>%
group_by(State) %>%
summarise ( min_ABV =min(ABV), mean_ABV = mean(ABV), max_ABV = max(ABV)) %>%
arrange(desc(max_ABV)))mIBU<- fullData %>%
group_by(State) %>%
summarise (min_IBU =min(IBU), mean_IBU = mean(IBU), max_IBU = max(IBU)) %>%
arrange(desc(max_IBU))fullData %>%
group_by(BreweryName) %>%
summarise ( min_IBU =min(IBU), mean_IBU = mean(IBU), max_IBU = max(IBU)) %>%
arrange(desc(max_IBU))## # A tibble: 551 x 4
## BreweryName min_IBU mean_IBU max_IBU
## <chr> <dbl> <dbl> <dbl>
## 1 Astoria Brewing Company 35 62 138
## 2 Wolf Hills Brewing Company 135 135 135
## 3 Cape Ann Brewing Company 30 60.4 130
## 4 Christian Moerlein Brewing Company 126 126 126
## 5 Buffalo Bayou Brewing Company 20 59.3 118
## 6 Modern Times Beer 30 64.5 115
## 7 Beach Brewing Company 108 108 108
## 8 Seven Brides Brewery 55 80 105
## 9 Alameda Brewing 20 53 103
## 10 Good People Brewing Company 36 58.6 103
## # … with 541 more rows
coef(lm(IBU ~ ABV, data = fullData))## (Intercept) ABV
## -34.09865 1282.03706
ggplot(fullData, aes(ABV, IBU, colour = State)) +
geom_point(na.rm = T) +
geom_abline(intercept = -34.09865, slope = 1282.037)ggplot(fullData, aes(ABV, IBU, colour = ABV)) +
geom_point(aes(colour = ABV), na.rm = T) +
geom_abline(intercept = -34.09865, slope = 1282.037) #### Color by state
ggplot(fullData, aes(ABV, IBU, colour = State)) +
facet_wrap(~ State) +
geom_point(na.rm = T) +
geom_abline(intercept = -34.09865, slope = 1282.037) #### Color by alcohol volume
ggplot(fullData, aes(ABV, IBU, colour = State)) +
facet_wrap(~ State) +
geom_point(aes(colour = ABV),na.rm = TRUE) +
geom_abline(intercept = -34.09865, slope = 1282.037)ggplot(fullData, aes(ABV, IBU, colour = State)) +
facet_wrap(~ Style) +
geom_point(aes(colour = ABV),na.rm = TRUE) +
geom_abline(intercept = -34.09865, slope = 1282.037)ggplot(fullData, aes(State, ABV)) +
geom_boxplot(na.rm = T)ggplot(fullData, aes(State, IBU)) +
geom_boxplot(na.rm = T) ggplot(fullData, aes(ABV, IBU, colour = ABV)) +
geom_point(aes(colour = ABV), na.rm = T) +
geom_abline(intercept = -34.09865, slope = 1282.037) +
geom_smooth(method = lm, se = T, na.rm = T) #####Density map of ABV vs IBU.
dplot<-ggplot(fullData, aes(ABV, IBU, colour = ABV)) +
geom_point( na.rm = T)
dplot + stat_density_2d(aes(fill= ..level.., alpha = 0.75 ) , na.rm = T, geom = "polygon")